<?php

namespace analysis;

use think\facade\Env;

class Analysis {

    private $dictPath;   // 词库目录
    private $key;        // 需要被分词的目标字符串
    private $dict;       // 需要用到的多个分词库，按key从小到大查询
    private $num;        // 匹配到的分词最大返回条数

    /**
     * 获取分词结果
     * @param string $key 需要被分词的目标字符串
     * @param array $dict 需要用到的分词库，一维数组
     * @param int $num 匹配到的分词最大返回条数
     * @param int $type 返回结果类型 1简单数组 2分词字符串 3完整数组
     * @return array|bool     分词结果或false
     */
    public function ppl($key, $dict, $num = 5, $type=1) {
        $this->key = $key;
        $this->dict = $dict;
        $this->num = $num;
        $this->dictPath = Env::get('ROOT_PATH') . 'extend/analysis/dict/';
        //检测词库文件是否存在
        if($this->checkDict()===false){
            die('The dictionary was not found or could not be read!');
        }
        $result = $this->startAnalysis();
        if($type==1){
            $result = empty($result) ? [] : array_column($result,'word');
        }
        if($type==2){
            $result = empty($result) ? '' : implode(',',array_column($result,'word'));
        }
        return $result;
    }

    /**
     * 检测词库文件是否存在
     */
    private function checkDict() {
        $data = [];
        foreach ($this->dict as $v) {
            if (!is_file($this->dictPath . $v)) {
                return false;
            } else {
                if (is_readable($this->dictPath . $v) == false) {
                    return false;
                } else {
                    $data[] = $v;
                }
            }
        }
        $this->dict = $data;
    }

    /**
     * 打开词库进行中文检测
     */
    private function startAnalysis() {
        $key = [];
        foreach ($this->dict as $v) {
            $contents = file_get_contents($this->dictPath . $v);
            $array = explode(PHP_EOL, $contents);
            # 分词查询
            foreach ($array as $k) {
                if (empty($k)) continue;
                $arr = explode(',', $k);
                if (empty($arr[0])) continue; //不存在词条跳过
                if (strpos($this->key, $arr[0]) !== false) {
                    $level = isset($arr[1]) ? (int)$arr[1] : 100; //未指定权重默认使用100
                    $key[] = ['word' => $arr[0], 'level' => $level];
                }
            }
        }
        if (count($key) == 0) return [];
        $array = $this->removeDuplicate($key, 'word'); //去除重复项
        array_multisort(array_column($array, 'level'), SORT_DESC, $array); //二维数组按权重排序
        $array = array_slice($array, 0, $this->num); //按长度返回
        return $array;
    }

    /**
     * 二维数组去重
     * @param $arr
     * @param $key
     * @return array
     */
    private function removeDuplicate($arr, $key) {
        $res = array();
        foreach ($arr as $value) {
            if (isset($res[$value[$key]])) {
                unset($value[$key]); //有：销毁
            } else {
                $res[$value[$key]] = $value;
            }
        }
        return array_values($res);
    }

}